


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>Engine Caching &mdash; Torch-TensorRT v2.10.0.dev0+2e6843e documentation</title>
  

  
  
  
  

  

  
  
    

  

  <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-binder.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-dataframe.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/sg_gallery-rendered-html.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/collapsible-lists/css/tree_view.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />
    <link rel="index" title="Index" href="../../../genindex.html" />
    <link rel="search" title="Search" href="../../../search.html" />
    <link rel="next" title="Engine Caching (BERT)" href="engine_caching_bert_example.html" />
    <link rel="prev" title="Deploy Quantized Models using Torch-TensorRT" href="vgg16_ptq.html" />
  <!-- Google Tag Manager -->
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','');</script>
    <!-- End Google Tag Manager -->
  

  
  <script src="../../../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../../../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>

          <li class="main-menu-item">
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Learn
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/get-started">
                  <span class=dropdown-title>Get Started</span>
                  <p>Run PyTorch locally or get started quickly with one of the supported cloud platforms</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials">
                  <span class="dropdown-title">Tutorials</span>
                  <p>Whats new in PyTorch tutorials</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/basics/intro.html">
                  <span class="dropdown-title">Learn the Basics</span>
                  <p>Familiarize yourself with PyTorch concepts and modules</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/recipes/recipes_index.html">
                  <span class="dropdown-title">PyTorch Recipes</span>
                  <p>Bite-size, ready-to-deploy PyTorch code examples</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/introyt.html">
                  <span class="dropdown-title">Intro to PyTorch - YouTube Series</span>
                  <p>Master PyTorch basics with our engaging YouTube tutorial series</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Ecosystem
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem">
                  <span class="dropdown-title">Tools</span>
                  <p>Learn about the tools and frameworks in the PyTorch Ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/#community-module">
                  <span class=dropdown-title>Community</span>
                  <p>Join the PyTorch developer community to contribute, learn, and get your questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://discuss.pytorch.org/" target="_blank">
                  <span class=dropdown-title>Forums</span>
                  <p>A place to discuss PyTorch code, issues, install, research</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/resources">
                  <span class=dropdown-title>Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem/contributor-awards-2024">
                  <span class="dropdown-title">Contributor Awards - 2024</span>
                  <p>Award winners announced at this year's PyTorch Conference</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Edge
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/edge">
                  <span class="dropdown-title">About PyTorch Edge</span>
                  <p>Build innovative and privacy-aware AI experiences for edge devices</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/executorch-overview">
                  <span class="dropdown-title">ExecuTorch</span>
                  <p>End-to-end solution for enabling on-device inference capabilities across mobile and edge devices</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/executorch/stable/index.html">
                  <span class="dropdown-title">ExecuTorch Docs</span>
                </a>
              </div>
            </div>  
          </li>

          <li class="main-menu-item">
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Docs
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/docs/stable/index.html">
                  <span class="dropdown-title">PyTorch</span>
                  <p>Explore the documentation for comprehensive guidance on how to use PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/pytorch-domains">
                  <span class="dropdown-title">PyTorch Domains</span>
                  <p>Read the PyTorch Domains documentation to learn more about domain-specific libraries</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Blogs & News 
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/blog/">
                  <span class="dropdown-title">PyTorch Blog</span>
                  <p>Catch up on the latest technical news and happenings</p>
                </a>
                 <a class="nav-dropdown-item" href="https://pytorch.org/community-blog">
                  <span class="dropdown-title">Community Blog</span>
                  <p>Stories from the PyTorch ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/videos">
                  <span class="dropdown-title">Videos</span>
                  <p>Learn about the latest PyTorch tutorials, new, and more </p>
                <a class="nav-dropdown-item" href="https://pytorch.org/community-stories">
                  <span class="dropdown-title">Community Stories</span>
                  <p>Learn how our community solves real, everyday machine learning problems with PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/events">
                  <span class="dropdown-title">Events</span>
                  <p>Find events, webinars, and podcasts</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/newsletter">
                  <span class="dropdown-title">Newsletter</span>
                  <p>Stay up-to-date with the latest updates</p>
                </a>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                About
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/foundation">
                  <span class="dropdown-title">PyTorch Foundation</span>
                  <p>Learn more about the PyTorch Foundation</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/governing-board">
                  <span class="dropdown-title">Governing Board</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/credits">
                  <span class="dropdown-title">Cloud Credit Program</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tac">
                  <span class="dropdown-title">Technical Advisory Council</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/staff">
                  <span class="dropdown-title">Staff</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/contact-us">
                  <span class="dropdown-title">Contact Us</span>
                </a>
              </div>
            </div>
          </li>

          <li class="main-menu-item">
            <div class="no-dropdown">
              <a href="https://pytorch.org/join" data-cta="join">
                Become a Member
              </a>
            </div>
          </li>
          <li>
           <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="github-icon">
             </a>
           </div>
          </li>
          <!--- TODO: This block adds the search icon to the nav bar. We will enable it later. 
          <li>
            <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="search-icon">
             </a>
            </div>
          </li>
          --->
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>
  </div>
</div>

<body class="pytorch-body">

   

    

    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">
            

            
              
              
                <div class="version">
                  v2.10.0.dev0+2e6843e
                </div>
              
            

            


  


<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

            
          </div>

          
            
            
              
            
            
              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/jetpack.html">Torch-TensorRT in JetPack</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/quick_start.html">Quick Start</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../getting_started/capture_and_replay.html">Introduction</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">User Guide</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/torch_tensorrt_explained.html">Torch-TensorRT Explained</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/dynamic_shapes.html">Dynamic shapes with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/saving_models.html">Saving models compiled with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/runtime.html">Deploying Torch-TensorRT Programs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/using_dla.html">DLA</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../user_guide/mixed_precision.html">Compile Mixed Precision models with Torch-TensorRT</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="torch_compile_advanced_usage.html">Torch Compile Advanced Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="vgg16_ptq.html">Deploy Quantized Models using Torch-TensorRT</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Engine Caching</a></li>
<li class="toctree-l1"><a class="reference internal" href="engine_caching_bert_example.html">Engine Caching (BERT)</a></li>
<li class="toctree-l1"><a class="reference internal" href="refit_engine_example.html">Refitting Torch-TensorRT Programs with New Weights</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../serving_torch_tensorrt_with_triton.html">Serving a Torch-TensorRT model with Triton</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_export_cudagraphs.html">Torch Export with Cudagraphs</a></li>
<li class="toctree-l1"><a class="reference internal" href="converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="auto_generate_converters.html">Automatically Generate a Converter for a Custom Kernel</a></li>
<li class="toctree-l1"><a class="reference internal" href="auto_generate_plugins.html">Automatically Generate a Plugin for a Custom Kernel</a></li>
<li class="toctree-l1"><a class="reference internal" href="mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="weight_streaming_example.html">Weight Streaming</a></li>
<li class="toctree-l1"><a class="reference internal" href="pre_allocated_output_example.html">Pre-allocated output buffer</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../dynamo/torch_compile.html">TensorRT Backend for <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code></a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dynamo/dynamo_export.html">Compiling Exported Programs with Torch-TensorRT</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">TorchScript Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html">Creating a TorchScript Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html#working-with-torchscript-in-python">Working with TorchScript in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/creating_torchscript_module_in_python.html#saving-torchscript-module-to-disk">Saving TorchScript Module to Disk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/getting_started_with_python_api.html">Using Torch-TensorRT in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/getting_started_with_cpp_api.html">Using Torch-TensorRT in  C++</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ts/ptq.html">Post Training Quantization (PTQ)</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">FX Frontend</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../fx/getting_started_with_fx_path.html">Torch-TensorRT (FX Frontend) User Guide</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Model Zoo</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_resnet_example.html">Compiling ResNet with dynamic shapes using the <cite>torch.compile</cite> backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_transformers_example.html">Compiling BERT using the <cite>torch.compile</cite> backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_stable_diffusion.html">Compiling Stable Diffusion model using the <cite>torch.compile</cite> backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../compile_hf_models.html">Compiling LLM models from Huggingface</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_compile_gpt2.html">Compiling GPT2 using the Torch-TensorRT <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code> frontend</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_export_sam2.html">Compiling SAM2 using the dynamo backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_export_flux_dev.html">Compiling FLUX.1-dev model using the Torch-TensorRT dynamo backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../notebooks.html">Legacy notebooks</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/torch_tensorrt.html">torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/dynamo.html">torch_tensorrt.dynamo</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/logging.html">torch_tensorrt.logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/fx.html">torch_tensorrt.fx</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/ts.html">torch_tensorrt.ts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../py_api/ptq.html">torch_tensorrt.ts.ptq</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/torch_tensort_cpp.html">Torch-TensorRT C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt.html">Namespace torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt__logging.html">Namespace torch_tensorrt::logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../_cpp_api/namespace_torch_tensorrt__torchscript.html">Namespace torch_tensorrt::torchscript</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">CLI Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../cli/torchtrtc.html">torchtrtc</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Contributor Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/system_overview.html">System Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/dynamo_converters.html">Writing Dynamo Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/writing_dynamo_aten_lowering_passes.html">Writing Dynamo ATen Lowering Passes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/ts_converters.html">Writing TorchScript Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/useful_links.html">Useful Links for Torch-TensorRT Development</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../contributors/resource_management.html">Resource Management</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Indices</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../indices/supported_ops.html">Operators Supported</a></li>
</ul>

            
          
        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
          















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">
    
      <li>
        <a href="../../../index.html">
          
            Docs
          
        </a> &gt;
      </li>

        
      <li>Engine Caching</li>
    
    
      <li class="pytorch-breadcrumbs-aside">
        
            
            <a href="../../../_sources/tutorials/_rendered_examples/dynamo/engine_caching_example.rst.txt" rel="nofollow"><img src="../../../_static/images/view-page-source-icon.svg"></a>
          
        
      </li>
    
  </ul>

  
</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">

        
          <!-- Google Tag Manager (noscript) -->
          <noscript><iframe src="https://www.googletagmanager.com/ns.html?id="
          height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
          <!-- End Google Tag Manager (noscript) -->
          
          <div class="rst-content">
          
            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
              
  <div class="sphx-glr-download-link-note admonition note">
<p class="admonition-title">Note</p>
<p><a class="reference internal" href="#sphx-glr-download-tutorials-rendered-examples-dynamo-engine-caching-example-py"><span class="std std-ref">Go to the end</span></a>
to download the full example code</p>
</div>
<section class="sphx-glr-example-title" id="engine-caching">
<span id="engine-caching-example"></span><span id="sphx-glr-tutorials-rendered-examples-dynamo-engine-caching-example-py"></span><h1>Engine Caching<a class="headerlink" href="#engine-caching" title="Permalink to this heading">¶</a></h1>
<p>As model sizes increase, the cost of compilation will as well. With AOT methods
like <code class="docutils literal notranslate"><span class="pre">torch.dynamo.compile</span></code>, this cost is paid upfront. However if the weights
change, the session ends or you are using JIT methods like <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code>, as
graphs get invalidated they get re-compiled, this cost will get paid repeatedly.
Engine caching is a way to mitigate this cost by saving constructed engines to disk
and re-using them when possible. This tutorial demonstrates how to use engine caching
with TensorRT in PyTorch. Engine caching can significantly speed up subsequent model
compilations reusing previously built TensorRT engines.</p>
<p>We’ll explore two approaches:</p>
<blockquote>
<div><ol class="arabic simple">
<li><p>Using torch_tensorrt.dynamo.compile</p></li>
<li><p>Using torch.compile with the TensorRT backend</p></li>
</ol>
</div></blockquote>
<p>The example uses a pre-trained ResNet18 model and shows the
differences between compilation without caching, with caching enabled,
and when reusing cached engines.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Dict</span><span class="p">,</span> <span class="n">Optional</span>

<span class="kn">import</span><span class="w"> </span><span class="nn">numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">np</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">torch</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">torch_tensorrt</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">torch_trt</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">torchvision.models</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">models</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">torch_tensorrt.dynamo._defaults</span><span class="w"> </span><span class="kn">import</span> <span class="n">TIMING_CACHE_PATH</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">torch_tensorrt.dynamo._engine_cache</span><span class="w"> </span><span class="kn">import</span> <span class="n">BaseEngineCache</span>

<span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">torch</span><span class="o">.</span><span class="n">manual_seed</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>

<span class="n">model</span> <span class="o">=</span> <span class="n">models</span><span class="o">.</span><span class="n">resnet18</span><span class="p">(</span><span class="n">pretrained</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">eval</span><span class="p">()</span>
<span class="n">enabled_precisions</span> <span class="o">=</span> <span class="p">{</span><span class="n">torch</span><span class="o">.</span><span class="n">float</span><span class="p">}</span>
<span class="n">min_block_size</span> <span class="o">=</span> <span class="mi">1</span>
<span class="n">use_python_runtime</span> <span class="o">=</span> <span class="kc">False</span>


<span class="k">def</span><span class="w"> </span><span class="nf">remove_timing_cache</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="n">TIMING_CACHE_PATH</span><span class="p">):</span>
    <span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
        <span class="n">os</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
</pre></div>
</div>
<section id="engine-caching-for-jit-compilation">
<h2>Engine Caching for JIT Compilation<a class="headerlink" href="#engine-caching-for-jit-compilation" title="Permalink to this heading">¶</a></h2>
<p>The primary goal of engine caching is to help speed up JIT workflows. <code class="docutils literal notranslate"><span class="pre">torch.compile</span></code>
provides a great deal of flexibility in model construction which makes it a good
first tool to try when looking to speed up your workflow. However, historically
the cost of compilation and in particular recompilation has been a barrier to entry
for many users. If for some reason a subgraph gets invalidated, that graph is reconstructed
scratch prior to the addition of engine caching. Now as engines are constructed, with <code class="docutils literal notranslate"><span class="pre">cache_built_engines=True</span></code>,
engines are saved to disk tied to a hash of their corresponding PyTorch subgraph. If
in a subsequent compilation, either as part of this session or a new session, the cache will
pull the built engine and <strong>refit</strong> the weights which can reduce compilation times by orders of magnitude.
As such, in order to insert a new engine into the cache (i.e. <code class="docutils literal notranslate"><span class="pre">cache_built_engines=True</span></code>),
the engine must be refittable (<code class="docutils literal notranslate"><span class="pre">immutable_weights=False</span></code>). See <a class="reference internal" href="refit_engine_example.html#refit-engine-example"><span class="std std-ref">Refitting Torch-TensorRT Programs with New Weights</span></a> for more details.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">torch_compile</span><span class="p">(</span><span class="n">iterations</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
    <span class="n">times</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="n">start</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">end</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>

    <span class="c1"># The 1st iteration is to measure the compilation time without engine caching</span>
    <span class="c1"># The 2nd and 3rd iterations are to measure the compilation time with engine caching.</span>
    <span class="c1"># Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.</span>
    <span class="c1"># The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.</span>
    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
        <span class="n">inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)]</span>
        <span class="c1"># remove timing cache and reset dynamo just for engine caching messurement</span>
        <span class="n">remove_timing_cache</span><span class="p">()</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">_dynamo</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>

        <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">False</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">True</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">True</span>

        <span class="n">start</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">compiled_model</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>
            <span class="n">model</span><span class="p">,</span>
            <span class="n">backend</span><span class="o">=</span><span class="s2">&quot;tensorrt&quot;</span><span class="p">,</span>
            <span class="n">options</span><span class="o">=</span><span class="p">{</span>
                <span class="s2">&quot;use_python_runtime&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
                <span class="s2">&quot;enabled_precisions&quot;</span><span class="p">:</span> <span class="n">enabled_precisions</span><span class="p">,</span>
                <span class="s2">&quot;min_block_size&quot;</span><span class="p">:</span> <span class="n">min_block_size</span><span class="p">,</span>
                <span class="s2">&quot;immutable_weights&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
                <span class="s2">&quot;cache_built_engines&quot;</span><span class="p">:</span> <span class="n">cache_built_engines</span><span class="p">,</span>
                <span class="s2">&quot;reuse_cached_engines&quot;</span><span class="p">:</span> <span class="n">reuse_cached_engines</span><span class="p">,</span>
            <span class="p">},</span>
        <span class="p">)</span>
        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
            <span class="n">compiled_model</span><span class="p">(</span><span class="o">*</span><span class="n">inputs</span><span class="p">)</span>  <span class="c1"># trigger the compilation</span>
        <span class="n">end</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">synchronize</span><span class="p">()</span>
        <span class="n">times</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">start</span><span class="o">.</span><span class="n">elapsed_time</span><span class="p">(</span><span class="n">end</span><span class="p">))</span>

    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;----------------torch_compile----------------&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;disable engine caching, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to cache engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to reuse engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>


<span class="n">torch_compile</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="engine-caching-for-aot-compilation">
<h2>Engine Caching for AOT Compilation<a class="headerlink" href="#engine-caching-for-aot-compilation" title="Permalink to this heading">¶</a></h2>
<p>Similarly to the JIT workflow, AOT workflows can benefit from engine caching.
As the same architecture or common subgraphs get recompiled, the cache will pull
previously built engines and refit the weights.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">def</span><span class="w"> </span><span class="nf">dynamo_compile</span><span class="p">(</span><span class="n">iterations</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
    <span class="n">times</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="n">start</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">end</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>

    <span class="n">example_inputs</span> <span class="o">=</span> <span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">),)</span>
    <span class="c1"># Mark the dim0 of inputs as dynamic</span>
    <span class="n">batch</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">export</span><span class="o">.</span><span class="n">Dim</span><span class="p">(</span><span class="s2">&quot;batch&quot;</span><span class="p">,</span> <span class="nb">min</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="nb">max</span><span class="o">=</span><span class="mi">200</span><span class="p">)</span>
    <span class="n">exp_program</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">export</span><span class="o">.</span><span class="n">export</span><span class="p">(</span>
        <span class="n">model</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="n">example_inputs</span><span class="p">,</span> <span class="n">dynamic_shapes</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;x&quot;</span><span class="p">:</span> <span class="p">{</span><span class="mi">0</span><span class="p">:</span> <span class="n">batch</span><span class="p">}}</span>
    <span class="p">)</span>

    <span class="c1"># The 1st iteration is to measure the compilation time without engine caching</span>
    <span class="c1"># The 2nd and 3rd iterations are to measure the compilation time with engine caching.</span>
    <span class="c1"># Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.</span>
    <span class="c1"># The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.</span>
    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
        <span class="n">inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">100</span> <span class="o">+</span> <span class="n">i</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)]</span>
        <span class="n">remove_timing_cache</span><span class="p">()</span>  <span class="c1"># remove timing cache just for engine caching messurement</span>
        <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">False</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">True</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">True</span>

        <span class="n">start</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">trt_gm</span> <span class="o">=</span> <span class="n">torch_trt</span><span class="o">.</span><span class="n">dynamo</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>
            <span class="n">exp_program</span><span class="p">,</span>
            <span class="nb">tuple</span><span class="p">(</span><span class="n">inputs</span><span class="p">),</span>
            <span class="n">use_python_runtime</span><span class="o">=</span><span class="n">use_python_runtime</span><span class="p">,</span>
            <span class="n">enabled_precisions</span><span class="o">=</span><span class="n">enabled_precisions</span><span class="p">,</span>
            <span class="n">min_block_size</span><span class="o">=</span><span class="n">min_block_size</span><span class="p">,</span>
            <span class="n">immutable_weights</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
            <span class="n">cache_built_engines</span><span class="o">=</span><span class="n">cache_built_engines</span><span class="p">,</span>
            <span class="n">reuse_cached_engines</span><span class="o">=</span><span class="n">reuse_cached_engines</span><span class="p">,</span>
            <span class="n">engine_cache_size</span><span class="o">=</span><span class="mi">1</span> <span class="o">&lt;&lt;</span> <span class="mi">30</span><span class="p">,</span>  <span class="c1"># 1GB</span>
        <span class="p">)</span>
        <span class="c1"># output = trt_gm(*inputs)</span>
        <span class="n">end</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">synchronize</span><span class="p">()</span>
        <span class="n">times</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">start</span><span class="o">.</span><span class="n">elapsed_time</span><span class="p">(</span><span class="n">end</span><span class="p">))</span>

    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;----------------dynamo_compile----------------&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;disable engine caching, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to cache engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to reuse engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>


<span class="n">dynamo_compile</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="custom-engine-cache">
<h2>Custom Engine Cache<a class="headerlink" href="#custom-engine-cache" title="Permalink to this heading">¶</a></h2>
<p>By default, the engine cache is stored in the system’s temporary directory. Both the cache directory and
size limit can be customized by passing <code class="docutils literal notranslate"><span class="pre">engine_cache_dir</span></code> and <code class="docutils literal notranslate"><span class="pre">engine_cache_size</span></code>.
Users can also define their own engine cache implementation by extending the <code class="docutils literal notranslate"><span class="pre">BaseEngineCache</span></code> class.
This allows for remote or shared caching if so desired.</p>
<dl class="simple">
<dt>The custom engine cache should implement the following methods:</dt><dd><ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">save</span></code>: Save the engine blob to the cache.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">load</span></code>: Load the engine blob from the cache.</p></li>
</ul>
</dd>
</dl>
<p>The hash provided by the cache systen is a weight agnostic hash of the originating PyTorch subgraph (post lowering).
The blob contains a serialized engine, calling spec data, and weight map information in the pickle format</p>
<p>Below is an example of a custom engine cache implementation that implents a <code class="docutils literal notranslate"><span class="pre">RAMEngineCache</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">class</span><span class="w"> </span><span class="nc">RAMEngineCache</span><span class="p">(</span><span class="n">BaseEngineCache</span><span class="p">):</span>
    <span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
        <span class="bp">self</span><span class="p">,</span>
    <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Constructs a user held engine cache in memory.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">engine_cache</span><span class="p">:</span> <span class="n">Dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>

    <span class="k">def</span><span class="w"> </span><span class="nf">save</span><span class="p">(</span>
        <span class="bp">self</span><span class="p">,</span>
        <span class="nb">hash</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
        <span class="n">blob</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">,</span>
    <span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Insert the engine blob to the cache.</span>

<span class="sd">        Args:</span>
<span class="sd">            hash (str): The hash key to associate with the engine blob.</span>
<span class="sd">            blob (bytes): The engine blob to be saved.</span>

<span class="sd">        Returns:</span>
<span class="sd">            None</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">engine_cache</span><span class="p">[</span><span class="nb">hash</span><span class="p">]</span> <span class="o">=</span> <span class="n">blob</span>

    <span class="k">def</span><span class="w"> </span><span class="nf">load</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">hash</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bytes</span><span class="p">]:</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Load the engine blob from the cache.</span>

<span class="sd">        Args:</span>
<span class="sd">            hash (str): The hash key of the engine to load.</span>

<span class="sd">        Returns:</span>
<span class="sd">            Optional[bytes]: The engine blob if found, None otherwise.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="k">if</span> <span class="nb">hash</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_cache</span><span class="p">:</span>
            <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">engine_cache</span><span class="p">[</span><span class="nb">hash</span><span class="p">]</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="k">return</span> <span class="kc">None</span>


<span class="k">def</span><span class="w"> </span><span class="nf">torch_compile_my_cache</span><span class="p">(</span><span class="n">iterations</span><span class="o">=</span><span class="mi">3</span><span class="p">):</span>
    <span class="n">times</span> <span class="o">=</span> <span class="p">[]</span>
    <span class="n">engine_cache</span> <span class="o">=</span> <span class="n">RAMEngineCache</span><span class="p">()</span>
    <span class="n">start</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
    <span class="n">end</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">Event</span><span class="p">(</span><span class="n">enable_timing</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>

    <span class="c1"># The 1st iteration is to measure the compilation time without engine caching</span>
    <span class="c1"># The 2nd and 3rd iterations are to measure the compilation time with engine caching.</span>
    <span class="c1"># Since the 2nd iteration needs to compile and save the engine, it will be slower than the 1st iteration.</span>
    <span class="c1"># The 3rd iteration should be faster than the 1st iteration because it loads the cached engine.</span>
    <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">iterations</span><span class="p">):</span>
        <span class="n">inputs</span> <span class="o">=</span> <span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">rand</span><span class="p">((</span><span class="mi">100</span><span class="p">,</span> <span class="mi">3</span><span class="p">,</span> <span class="mi">224</span><span class="p">,</span> <span class="mi">224</span><span class="p">))</span><span class="o">.</span><span class="n">to</span><span class="p">(</span><span class="s2">&quot;cuda&quot;</span><span class="p">)]</span>
        <span class="c1"># remove timing cache and reset dynamo just for engine caching messurement</span>
        <span class="n">remove_timing_cache</span><span class="p">()</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">_dynamo</span><span class="o">.</span><span class="n">reset</span><span class="p">()</span>

        <span class="k">if</span> <span class="n">i</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">False</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">False</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">cache_built_engines</span> <span class="o">=</span> <span class="kc">True</span>
            <span class="n">reuse_cached_engines</span> <span class="o">=</span> <span class="kc">True</span>

        <span class="n">start</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">compiled_model</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span>
            <span class="n">model</span><span class="p">,</span>
            <span class="n">backend</span><span class="o">=</span><span class="s2">&quot;tensorrt&quot;</span><span class="p">,</span>
            <span class="n">options</span><span class="o">=</span><span class="p">{</span>
                <span class="s2">&quot;use_python_runtime&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">,</span>
                <span class="s2">&quot;enabled_precisions&quot;</span><span class="p">:</span> <span class="n">enabled_precisions</span><span class="p">,</span>
                <span class="s2">&quot;min_block_size&quot;</span><span class="p">:</span> <span class="n">min_block_size</span><span class="p">,</span>
                <span class="s2">&quot;immutable_weights&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">,</span>
                <span class="s2">&quot;cache_built_engines&quot;</span><span class="p">:</span> <span class="n">cache_built_engines</span><span class="p">,</span>
                <span class="s2">&quot;reuse_cached_engines&quot;</span><span class="p">:</span> <span class="n">reuse_cached_engines</span><span class="p">,</span>
                <span class="s2">&quot;custom_engine_cache&quot;</span><span class="p">:</span> <span class="n">engine_cache</span><span class="p">,</span>
            <span class="p">},</span>
        <span class="p">)</span>
        <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
            <span class="n">compiled_model</span><span class="p">(</span><span class="o">*</span><span class="n">inputs</span><span class="p">)</span>  <span class="c1"># trigger the compilation</span>
        <span class="n">end</span><span class="o">.</span><span class="n">record</span><span class="p">()</span>
        <span class="n">torch</span><span class="o">.</span><span class="n">cuda</span><span class="o">.</span><span class="n">synchronize</span><span class="p">()</span>
        <span class="n">times</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">start</span><span class="o">.</span><span class="n">elapsed_time</span><span class="p">(</span><span class="n">end</span><span class="p">))</span>

    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;----------------torch_compile----------------&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;disable engine caching, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to cache engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>
    <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;enable engine caching to reuse engines, used:&quot;</span><span class="p">,</span> <span class="n">times</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="s2">&quot;ms&quot;</span><span class="p">)</span>


<span class="n">torch_compile_my_cache</span><span class="p">()</span>
</pre></div>
</div>
<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.000 seconds)</p>
<div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-rendered-examples-dynamo-engine-caching-example-py">
<div class="sphx-glr-download sphx-glr-download-python docutils container">
<p><a class="reference download internal" download="" href="../../../_downloads/1c759c0181fe2845e5579cc82e5b7a7a/engine_caching_example.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">engine_caching_example.py</span></code></a></p>
</div>
<div class="sphx-glr-download sphx-glr-download-jupyter docutils container">
<p><a class="reference download internal" download="" href="../../../_downloads/3454ee6d4b68e83cdf0c757f0059986b/engine_caching_example.ipynb"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Jupyter</span> <span class="pre">notebook:</span> <span class="pre">engine_caching_example.ipynb</span></code></a></p>
</div>
</div>
<p class="sphx-glr-signature"><a class="reference external" href="https://sphinx-gallery.github.io">Gallery generated by Sphinx-Gallery</a></p>
</section>
</section>


             </article>
             
            </div>
            <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="engine_caching_bert_example.html" class="btn btn-neutral float-right" title="Engine Caching (BERT)" accesskey="n" rel="next">Next <img src="../../../_static/images/chevron-right-orange.svg" class="next-page"></a>
      
      
        <a href="vgg16_ptq.html" class="btn btn-neutral" title="Deploy Quantized Models using Torch-TensorRT" accesskey="p" rel="prev"><img src="../../../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
      
    </div>
  

  

    <hr>

  

  <div role="contentinfo">
    <p>
        &copy; Copyright 2024, NVIDIA Corporation.

    </p>
  </div>
    
      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>
     

</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Engine Caching</a><ul>
<li><a class="reference internal" href="#engine-caching-for-jit-compilation">Engine Caching for JIT Compilation</a></li>
<li><a class="reference internal" href="#engine-caching-for-aot-compilation">Engine Caching for AOT Compilation</a></li>
<li><a class="reference internal" href="#custom-engine-cache">Custom Engine Cache</a></li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>

  


  

     
       <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
         <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
         <script src="../../../_static/jquery.js"></script>
         <script src="../../../_static/underscore.js"></script>
         <script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
         <script src="../../../_static/doctools.js"></script>
         <script src="../../../_static/collapsible-lists/js/CollapsibleLists.compressed.js"></script>
         <script src="../../../_static/collapsible-lists/js/apply-collapsible-lists.js"></script>
         <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
     

  

  <script type="text/javascript" src="../../../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../../../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../../../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">Stay up to date</li>
            <li><a href="https://www.facebook.com/pytorch" target="_blank">Facebook</a></li>
            <li><a href="https://twitter.com/pytorch" target="_blank">Twitter</a></li>
            <li><a href="https://www.youtube.com/pytorch" target="_blank">YouTube</a></li>
            <li><a href="https://www.linkedin.com/company/pytorch" target="_blank">LinkedIn</a></li>
          </ul>  
          </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">PyTorch Podcasts</li>
            <li><a href="https://open.spotify.com/show/6UzHKeiy368jKfQMKKvJY5" target="_blank">Spotify</a></li>
            <li><a href="https://podcasts.apple.com/us/podcast/pytorch-developer-podcast/id1566080008" target="_blank">Apple</a></li>
            <li><a href="https://www.google.com/podcasts?feed=aHR0cHM6Ly9mZWVkcy5zaW1wbGVjYXN0LmNvbS9PQjVGa0lsOA%3D%3D" target="_blank">Google</a></li>
            <li><a href="https://music.amazon.com/podcasts/7a4e6f0e-26c2-49e9-a478-41bd244197d0/PyTorch-Developer-Podcast?" target="_blank">Amazon</a></li>
          </ul>
         </div>
        </div>
        
        <div class="privacy-policy">
          <ul>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/terms/" target="_blank">Terms</a></li>
            <li class="privacy-policy-links">|</li>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/privacy-policy/" target="_blank">Privacy</a></li>
          </ul>
        </div>
        <div class="copyright">
        <p>© Copyright The Linux Foundation. The PyTorch Foundation is a project of The Linux Foundation.
          For web site terms of use, trademark policy and other policies applicable to The PyTorch Foundation please see
          <a href="https://www.linuxfoundation.org/policies/">www.linuxfoundation.org/policies/</a>. The PyTorch Foundation supports the PyTorch open source
          project, which has been established as PyTorch Project a Series of LF Projects, LLC. For policies applicable to the PyTorch Project a Series of LF Projects, LLC,
          please see <a href="https://www.lfprojects.org/policies/">www.lfprojects.org/policies/</a>.</p>
      </div>
     </div>

  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../../../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
           <li class="resources-mobile-menu-title">
             <a>Learn</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/get-started">Get Started</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials">Tutorials</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/basics/intro.html">Learn the Basics</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/recipes/recipes_index.html">PyTorch Recipes</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/introyt.html">Introduction to PyTorch - YouTube Series</a>
             </li>
           </ul>
           <li class="resources-mobile-menu-title">
             <a>Ecosystem</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/ecosystem">Tools</a>
             </li>
             <li>
               <a href="https://pytorch.org/#community-module">Community</a>
             </li>
             <li>
               <a href="https://discuss.pytorch.org/">Forums</a>
             </li>
             <li>
               <a href="https://pytorch.org/resources">Developer Resources</a>
             </li>
             <li>
               <a href="https://pytorch.org/ecosystem/contributor-awards-2023">Contributor Awards - 2024</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Edge</a>
           </li>

           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/edge">About PyTorch Edge</a>
             </li>
             
             <li>
               <a href="https://pytorch.org/executorch-overview">ExecuTorch</a>
             </li>
             <li>
               <a href="https://pytorch.org/executorch/stable/index.html">ExecuTorch Documentation</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Docs</a>
           </li>

           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/docs/stable/index.html">PyTorch</a>
            </li>

            <li>
              <a href="https://pytorch.org/pytorch-domains">PyTorch Domains</a>
            </li>
          </ul>

          <li class="resources-mobile-menu-title">
            <a>Blog & News</a>
          </li>
            
           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/blog/">PyTorch Blog</a>
            </li>
            <li>
              <a href="https://pytorch.org/community-blog">Community Blog</a>
            </li>

            <li>
              <a href="https://pytorch.org/videos">Videos</a>
            </li>

            <li>
              <a href="https://pytorch.org/community-stories">Community Stories</a>
            </li>
            <li>
              <a href="https://pytorch.org/events">Events</a>
            </li>
            <li>
               <a href="https://pytorch.org/newsletter">Newsletter</a>
             </li>
          </ul>
          
          <li class="resources-mobile-menu-title">
            <a>About</a>
          </li>

          <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/foundation">PyTorch Foundation</a>
            </li>
            <li>
              <a href="https://pytorch.org/governing-board">Governing Board</a>
            </li>
            <li>
               <a href="https://pytorch.org/credits">Cloud Credit Program</a>
            </li>
            <li>
               <a href="https://pytorch.org/tac">Technical Advisory Council</a>
            </li>
            <li>
               <a href="https://pytorch.org/staff">Staff</a>
            </li>
            <li>
               <a href="https://pytorch.org/contact-us">Contact Us</a>
            </li>
          </ul>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../../../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>